// *************************************
// EFMP Fleet
// EFMP_ConstructIVMatch.do
// *************************************

/*** START CODE ***/


use "${EFMPFleetData}/NN_TripDiffPreTrendGraphData.dta", clear


** Generate treatment variable and zip labels
gen treat = dac * aqmd


foreach x in PL225 PL300 PL400 {
gen IV_`x' = `x'_frac*treat
}

//IV 1: frac_efmp and avgsubsidy that use average sales in zip*post for denominator
//IV 2: LOO - IVs that leave out current period in zip quarter. 10 post-period quarters for calculating LOO

bys zip post: egen mszip = mean(cnt_total)
gen loo_mszip = (mszip * 10 - cnt_total)/9

gen total_subsidy_efmp_pu = avgsubsidy_efmp_pu * cnt_total

gen IV_mszip_frac_efmp_pu = 0
gen IV_loom_zip_frac_efmp_pu = 0
gen IV_mszip_avgsubsidy_efmp_pu = 0
gen IV_loom_zip_avgsubsidy_efmp_pu = 0

replace IV_mszip_frac_efmp_pu = cntEFMP_PU / mszip if treat==1
replace IV_loom_zip_frac_efmp_pu = cntEFMP_PU / loo_mszip if treat==1

replace IV_mszip_avgsubsidy_efmp_pu = total_subsidy_efmp_pu / mszip if treat==1
replace IV_loom_zip_avgsubsidy_efmp_pu = total_subsidy_efmp_pu / loo_mszip if treat==1

//Generate IV: frac_efmp and avgsubsidy that use average sales in AQMD*DAC*runquarter for denominator
//Leave out own zip in calculation

bys aqmd dac runquarter: egen msaqd = mean(cnt_total)
bys aqmd dac runquarter: egen count_msaqd = count(cnt_total)
tab aqmd dac, sum(count_msaqd)

gen IV_msaqd_frac_efmp_pu = 0
gen IV_msaqd_avgsubsidy_efmp_pu = 0
gen IV_loom_aqd_frac_efmp_pu = 0
gen IV_loom_aqd_avgsubsidy_efmp_pu = 0

replace IV_msaqd_frac_efmp_pu = cntEFMP_PU / msaqd if treat==1
replace IV_msaqd_avgsubsidy_efmp_pu = total_subsidy_efmp_pu / msaqd if treat==1

replace IV_loom_aqd_frac_efmp_pu = cntEFMP_PU / ((msaqd*count_msaqd - cnt_total)/(count_msaqd - 1)) if treat==1
replace IV_loom_aqd_avgsubsidy_efmp_pu = total_subsidy_efmp_pu / ((msaqd*count_msaqd - cnt_total)/(count_msaqd - 1)) if treat==1

//Generate IV: frac_efmp and avgsubsidy that use average sales in AQMD*DAC*runquarter for denominator
//Leave out own zip in calculation

gen IV_frac_efmp_pu = 0
gen IV_avgsubsidy_efmp_pu = 0

gen LOO_aqd_tot_qtr_cnt = (msaqd*count_msaqd - cnt_total)
bys zip post: egen LOO_aqd_tot_post_cnt = total(LOO_aqd_tot_qtr_cnt)
gen LOO_adq_mean_post_cnt = LOO_aqd_tot_post_cnt/10
gen scalefactor = (LOO_aqd_tot_qtr_cnt/LOO_adq_mean_post_cnt) if post==1

sum scalefactor

replace IV_frac_efmp_pu = cntEFMP_PU / (loo_mszip*scalefactor) if treat==1
replace IV_avgsubsidy_efmp_pu = total_subsidy_efmp_pu / (loo_mszip*scalefactor) if treat==1

sum IV*

//Truncate all IVs in a similar fashion to the truncation for frac_efmp and avg_subsidy

foreach x in IV_loom_zip_frac_efmp_pu IV_mszip_frac_efmp_pu IV_msaqd_frac_efmp_pu IV_loom_aqd_frac_efmp_pu IV_frac_efmp_pu {
replace `x' = 1 if `x'>1
}

foreach x in IV_loom_zip_avgsubsidy_efmp_pu IV_mszip_avgsubsidy_efmp_pu IV_msaqd_avgsubsidy_efmp_pu IV_loom_aqd_avgsubsidy_efmp_pu IV_avgsubsidy_efmp_pu{
replace `x' = 5000 if `x'>5000
}

sum IV*

corr IV*

//Construct NN IVs
tostring zip, gen(zip_label2)
keep zip zip_label2 runquarter post scaqmd_dum aqmd dac treat PL* MaxCES IV* frac_efmp_pu avgsubsidy_efmp_pu
drop if zip==.
sort zip runquarter
rename zip_label2 geoid

//Add zip centroids (note a few are missing lat and long)
merge n:1 geoid using  "${EFMPFleetData}/Gaz_zcta_2018.dta"
drop if _merge==2
gen nolatlon = _merge==1
tab _merge nolatlon
drop _merge aland awater*

tempfile zipqtrlvl
save `zipqtrlvl', replace


//Hard match on AQMD, DAC
//Identify matches based on demographics
//Assign lambda, avg subsidy in matched location as instrument
//IVs 1 - 5: Best N matches
//IV 6: Match excluding neighbors
//IV 7: Best N matches excluding nearby zips 
// ----------------------------


collapse (mean) PL* MaxCES aqmd dac intpt*, by(zip)
expand 2
sort zip
by zip: gen first = zip[_n]~=zip[_n-1]

gen random = runiform()

//to extract matches match "first zip list" to "second zip list".  
//Note: drop first match - each zip will match to itself

local NNmatchlist ""
local HardMatchVars "aqmd dac"
local NNmatchvars "MaxCES PL*"

teffects nnmatch (random `NNmatchvars') (first) , ///
	ematch(`HardMatchVars') ate biasadj(`NNmatchvars') nn(6) gen(nn_Demog_) dmv vce(robust)
predict nn_Demog_prox*, distance
local NNmatchlist "`NNmatchlist' Demog"


//Generate zip codes of each match.
forvalues x = 1/7 {
gen nn`x'_zip = .
	replace nn`x'_zip = zip[nn_Demog_`x']
}

//Verify that all matches hard match on aqmd dac
forvalues x = 1/7 {
gen nn`x'_aqmd = .
gen nn`x'_dac = .
	replace nn`x'_aqmd = aqmd[nn_Demog_`x']
	replace nn`x'_dac = dac[nn_Demog_`x']
	count if aqmd ~= nn`x'_aqmd
	count if dac ~= nn`x'_dac
	}	 
drop *_aqmd *_dac


//Drop self matches
drop nn1_zip nn_Demog_1 nn_Demog_prox1
//Drop duplicated observations
tab first
keep if first==1
//Drop extraneous variables used for matching algorithm
drop first nn_Demog_? random PL* MaxCES aqmd dac

//Merge zip list and match distances back into zip*quarter file
merge 1:m zip using `zipqtrlvl'
tab _merge
drop _merge
save `zipqtrlvl', replace

//for each of the 8 matches, merge location, frac_EFMP, avg_subsidy

local NNvarlist "zip intptlat intptlong frac_efmp_pu avgsubsidy_efmp_pu"
macro list

forvalues x = 2/7 {
use `zipqtrlvl', clear
keep `NNvarlist' runquarter
foreach y in `NNvarlist'{
rename `y' nn`x'_`y'
}
merge 1:m nn`x'_zip runquarter using `zipqtrlvl'
tab _merge
drop if _merge==1
drop _merge
save `zipqtrlvl', replace
}

//Generate distances between all matches

forvalues x = 2/7 {
geodist intptlat intptlon nn`x'_intptlat nn`x'_intptlon, gen(nn`x'_dist) miles
label var nn`x'_dist "Distance to NN`x' (miles)"
}
sum *dist if dac==1 & aqmd==1
sum nn2_dist if dac==1 & aqmd==1, detail
graph twoway histogram nn2_dist if dac==1 & aqmd==1


//Construct instruments: simple averages of first N neighbors

foreach x in frac_efmp_pu avgsubsidy_efmp_pu {
gen IV_1NN_`x' = (nn2_`x')/1
gen IV_2NN_`x' = (nn2_`x' + nn3_`x')/2
gen IV_3NN_`x' = (nn2_`x' + nn3_`x' + nn4_`x')/3
gen IV_4NN_`x' = (nn2_`x' + nn3_`x' + nn4_`x' + nn5_`x')/4
gen IV_5NN_`x' = (nn2_`x' + nn3_`x' + nn4_`x' + nn5_`x' + nn6_`x')/5
}

//Construct instruments: averages of first N neighbors, weighted by inverse "match distance"

//IV 8:Shift share.  Old IVs interacted with overall lambda, avg subsidy

foreach x in frac_efmp_pu avgsubsidy_efmp_pu {
gen IV_SS_PL225_`x' = 0
bys aqmd dac runquarter: egen mean_`x' = mean(`x')
tab runquarter treat if post==1, sum(mean_`x')
replace IV_SS_PL225_`x' = mean_`x' * PL225_frac if dac == 1 & aqmd == 1 & post == 1
}

bys treat: sum IV_* dac aqmd post

keep zip frac_efmp_pu avgsubsidy_efmp_pu runquarter IV*

foreach y in frac_efmp_pu avgsubsidy_efmp_pu {
foreach x of varlist IV_PL??? IV*`y' {
reg `y' `x', robust
}
reg `y' IV_PL??? IV*`y', robust
}


keep zip runquarter IV*

save  "${EFMPFleetData}/NN_IVConstruct.dta", replace 
